mo-456 commited on
Commit
cd85d8c
·
verified ·
1 Parent(s): 25568ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -38
app.py CHANGED
@@ -1,43 +1,48 @@
1
- from langchain_community.document_loaders import TextLoader
2
- from langchain.text_splitter import CharacterTextSplitter
3
- from langchain_community.embeddings import HuggingFaceEmbeddings
4
- from langchain_community.vectorstores import Chroma
5
- from langchain_community.llms import HuggingFaceHub
6
- from langchain.chains import RetrievalQA
7
  import gradio as gr
8
-
9
- # 1. Load plain text file (Arabic)
10
- loader = TextLoader("knowledge.txt", encoding="utf-8")
11
- docs = loader.load()
12
-
13
- # 2. Split into small chunks
14
- text_splitter = CharacterTextSplitter(separator="\n", chunk_size=500, chunk_overlap=50)
15
- documents = text_splitter.split_documents(docs)
16
-
17
- # 3. Create multilingual embeddings
18
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
19
-
20
- # 4. Use Chroma vector store (instead of FAISS)
21
- db = Chroma.from_documents(documents, embeddings)
22
- retriever = db.as_retriever()
23
-
24
- # 5. Load LLM from Hugging Face (free)
25
- llm = HuggingFaceHub(
26
- repo_id="tiiuae/falcon-7b-instruct",
27
- model_kwargs={"temperature": 0.3, "max_new_tokens": 200}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  )
29
 
30
- # 6. Create QA chain
31
- qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
32
 
33
- # 7. Gradio interface
34
- def answer_question(question):
35
- return qa_chain.run(question)
36
 
37
- gr.Interface(
38
- fn=answer_question,
39
- inputs=gr.Textbox(label="اكتب سؤالك هنا", lines=2),
40
- outputs=gr.Textbox(label="الإجابة"),
41
- title="شات بوت معرفي عربي",
42
- description="أدخل سؤالك حول محتوى ملف المعرفة بالعربية.",
43
- ).launch(share=True)
 
1
+ from fastapi import FastAPI
 
 
 
 
 
2
  import gradio as gr
3
+ from sentence_transformers import SentenceTransformer, util
4
+ import torch
5
+
6
+ # Load Arabic sentence transformer model
7
+ model = SentenceTransformer("CAMeL-Lab/bert-base-arabic-camelbert-ca")
8
+
9
+ # Load and preprocess knowledge base
10
+ def load_knowledge(file_path):
11
+ with open(file_path, "r", encoding="utf-8") as f:
12
+ content = f.read()
13
+ passages = [p.strip() for p in content.split("\n\n") if p.strip()]
14
+ embeddings = model.encode(passages, convert_to_tensor=True)
15
+ return passages, embeddings
16
+
17
+ passages, passage_embeddings = load_knowledge("knowledge.txt")
18
+
19
+ # Search function
20
+ def answer_question_arabic(query):
21
+ query_embedding = model.encode(query, convert_to_tensor=True)
22
+ scores = util.cos_sim(query_embedding, passage_embeddings)[0]
23
+ top_idx = torch.argmax(scores).item()
24
+ best_score = scores[top_idx].item()
25
+
26
+ if best_score < 0.4:
27
+ return "عذرًا، لم أتمكن من العثور على إجابة مناسبة في قاعدة المعرفة."
28
+ return passages[top_idx]
29
+
30
+ # Gradio UI
31
+ demo = gr.Interface(
32
+ fn=answer_question_arabic,
33
+ inputs=gr.Textbox(label="اكتب سؤالك هنا", placeholder="ما هي أهداف التنمية المستدامة؟"),
34
+ outputs=gr.Textbox(label="إجابة"),
35
+ title="روبوت المعرفة - التنمية المستدامة",
36
+ description="أجب عن الأسئلة باللغة العربية بناءً على قاعدة معرفية من وزارة المالية حول التنمية المستدامة والموازنة التشاركية."
37
  )
38
 
39
+ # FastAPI app
40
+ app = FastAPI()
41
 
42
+ @app.get("/")
43
+ def read_root():
44
+ return {"message": "مرحبا بك! انتقل إلى /gradio لبدء التفاعل."}
45
 
46
+ @app.get("/gradio")
47
+ def launch_gradio():
48
+ return gr.mount_gradio_app(app, demo, path="/gradio")