-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
131 lines (103 loc) · 4.84 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import streamlit as st
import json
from text_extraction import extract_text_from_pdf
from hierarchical_indexing import create_hierarchical_index, index_to_dict
from retrieval import create_faiss_index, retrieve_relevant_text
from question_answering import answer_question
from database import init_db, save_query, get_query_history, save_textbook_structure, get_textbook_structure
# Set page config
st.set_page_config(page_title="SOWA", page_icon="📚", layout="wide")
# Initialize database
init_db()
# Custom CSS
def local_css(file_name):
with open(file_name, "r") as f:
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
local_css("style.css")
# App title and description
#st.set_page_config(page_title="Advanced Textbook Q&A", page_icon="📚", layout="wide") #Removed as per instructions
# Sidebar
with st.sidebar:
# st.image("https://your-logo-url.com/logo.png", width=200)
st.title("SOWA")
st.markdown("Unlock the knowledge within your textbooks! 🚀")
# File upload
uploaded_files = st.file_uploader("Choose up to 3 PDF textbooks", type="pdf", accept_multiple_files=True)
# Initialize session state
if 'textbooks' not in st.session_state:
st.session_state.textbooks = {}
# Main content
st.title("📚 SOWA")
if uploaded_files:
for file in uploaded_files[:3]: # Limit to 3 textbooks
if file.name not in st.session_state.textbooks:
with st.spinner(f"Processing {file.name}..."):
# Extract text
text = extract_text_from_pdf(file)
# Create hierarchical index
index = create_hierarchical_index(text)
# Create FAISS index for efficient retrieval
faiss_index, indexed_texts = create_faiss_index([node.content for node in index.children])
st.session_state.textbooks[file.name] = {
"text": text,
"index": index_to_dict(index),
"faiss_index": faiss_index,
"indexed_texts": indexed_texts
}
# Save textbook structure to database
save_textbook_structure(file.name, json.dumps(index_to_dict(index)))
st.success(f"{file.name} processed successfully!")
# Textbook selection and querying
if st.session_state.textbooks:
col1, col2 = st.columns([1, 2])
with col1:
selected_textbook = st.selectbox("Select a textbook:", list(st.session_state.textbooks.keys()))
with col2:
query = st.text_input("Enter your question:", placeholder="What would you like to know?")
if query:
with st.spinner("🔍 Searching for answers..."):
# Retrieve relevant text
relevant_texts = retrieve_relevant_text(
query,
st.session_state.textbooks[selected_textbook]["faiss_index"],
st.session_state.textbooks[selected_textbook]["indexed_texts"]
)
# Combine relevant texts
context = " ".join(relevant_texts)
# Answer question
answer = answer_question(query, context)
# Save query to database
save_query(selected_textbook, query, answer)
# Display answer
st.markdown("## 💡 Answer")
st.info(answer)
# Display relevant context
with st.expander("📖 Relevant Context", expanded=False):
for i, text in enumerate(relevant_texts, 1):
st.markdown(f"**Excerpt {i}:**")
st.write(text[:300] + "...") # Show first 300 characters of each relevant text
st.markdown("---")
# Display textbook structure
with st.expander("📑 Textbook Structure", expanded=False):
structure = get_textbook_structure(selected_textbook)
if structure:
structure_dict = json.loads(structure)
st.write(f"### Chapters in {selected_textbook}")
for i, chapter in enumerate(structure_dict['children'], 1):
st.write(f"{i}. {chapter['content']}")
for j, section in enumerate(chapter['children'], 1):
st.write(f" {i}.{j} {section['content']}")
# Display query history
with st.expander("📜 Query History", expanded=False):
history = get_query_history()
for entry in history:
st.write(f"**Textbook:** {entry[0]}")
st.write(f"**Query:** {entry[1]}")
st.write(f"**Answer:** {entry[2]}")
st.write(f"**Timestamp:** {entry[3]}")
st.markdown("---")
else:
st.info("👆 Please upload at least one textbook to start querying.")
# Footer
st.markdown("---")
st.markdown("Made with ❤️ by Mansi")